{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8767a312-fb93-406e-a6e4-4a8d72aca03f",
   "metadata": {},
   "source": [
    "# 矩阵运算示例"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7b9a8665-1a25-4a7f-9ac3-716c184a9b7e",
   "metadata": {},
   "source": [
    "### 使用Python代码创建矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aaecfb4b-4740-47f1-81a5-f6960d6921da",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用Python代码构建矩阵示例\n",
    "# 用嵌套列表构造矩阵\n",
    "A = [[1,2],\n",
    "     [3,4],\n",
    "     [5,6]]\n",
    "\n",
    "# 取出行向量\n",
    "print(A[0])\n",
    "print([A[0]])\n",
    "\n",
    "# 取出列向量\n",
    "print([row[1] for row in A])\n",
    "print([[row[1] for row in A]])  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa20747e-11f2-4c2d-a882-184cb32638fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 矩阵加法运算示例\n",
    "A = [[1, 2], [3, 4], [5, 6]]\n",
    "B = [[7, 8], [9, 10], [11, 12]]\n",
    "C = [[0 for _ in range(2)] for _ in range(3)]\n",
    "\n",
    "for i in range(3):      # 遍历行\n",
    "    for j in range(2):  # 遍历列\n",
    "        C[i][j] = A[i][j] + B[i][j]\n",
    "\n",
    "print(\"矩阵A + 矩阵B 的结果：\")\n",
    "for row in C:\n",
    "    print(row)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c5416e8-244b-4794-bdc0-5037de5d7013",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 矩阵乘法运算示例，矩阵乘法要求A的列数要等于B的行数，结果矩阵的行数量等于A的行数，列数量等于B的列数\n",
    "# 定义3x2矩阵和2x3矩阵\n",
    "A = [[1, 2], [3, 4], [5, 6]]\n",
    "B = [[7, 8, 9], [10, 11, 12]]\n",
    "C = [[0 for _ in range(3)] for _ in range(3)]\n",
    "\n",
    "for i in range(3):            # 遍历第一个矩阵的行\n",
    "    for j in range(3):        # 遍历第二个矩阵的列\n",
    "        for k in range(2):    # 遍历第一个矩阵的列（或第二个矩阵的行）\n",
    "            C[i][j] += A[i][k] * B[k][j]\n",
    "\n",
    "print(\"矩阵A x 矩阵B 的结果：\")\n",
    "for row in C:\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7122d20d-708e-49ae-afb2-3848c55c05ab",
   "metadata": {},
   "source": [
    "### 在NumPy中使用矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0aa15b90-b049-4a54-8648-01c5b4fa701e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# NumPy数组的属性\n",
    "import numpy as np\n",
    "\n",
    "arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)\n",
    "\n",
    "print(f\"\\nArray:\\n{arr}\")\n",
    "print(f\"ndim: {arr.ndim}\")      # 输出: 2\n",
    "print(f\"shape: {arr.shape}\")    # 输出: (2, 3)\n",
    "print(f\"size: {arr.size}\")      # 输出: 6\n",
    "print(f\"dtype: {arr.dtype}\")    # 输出: float64\n",
    "print(f\"itemsize: {arr.itemsize}\") # 输出: 8 (因为是 float64)\n",
    "print(f\"Transpose:\\n{arr.T}\")   # 输出转置后的数组\n",
    "# Transpose:\n",
    "# [[1. 4.]\n",
    "#  [2. 5.]\n",
    "#  [3. 6.]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af5182ca-302f-4bb4-b6bb-806ba5cacde3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 在NumPy中创建数组\n",
    "import numpy as np\n",
    "\n",
    "# 从 Python 列表创建一个一维数组\n",
    "list1 = [1, 2, 3, 4, 5]\n",
    "arr_from_list = np.array(list1)\n",
    "print(\"从列表创建的数组:\\n\", arr_from_list)\n",
    "\n",
    "# 从 Python 嵌套列表创建一个二维数组\n",
    "list2d = [[1, 2, 3], [4, 5, 6]]\n",
    "arr_from_list2d = np.array(list2d)\n",
    "print(\"\\n从嵌套列表创建的数组:\\n\", arr_from_list2d)\n",
    "\n",
    "# 从 Python 元组创建一个数组\n",
    "tuple1 = (10, 20, 30)\n",
    "arr_from_tuple = np.array(tuple1)\n",
    "print(\"\\n从元组创建的数组:\\n\", arr_from_tuple)\n",
    "\n",
    "# 指定数据类型创建数组\n",
    "arr_with_dtype = np.array([1, 2, 3], dtype=np.float64)\n",
    "print(\"\\n指定数据类型为 float64 的数组:\\n\", arr_with_dtype)\n",
    "\n",
    "# 指定最小维数\n",
    "arr_ndmin = np.array([1, 2, 3], ndmin=2) # 创建一个形状为 (1, 3) 的二维数组\n",
    "print(\"\\n指定最小维数为 2 的数组:\\n\", arr_ndmin)\n",
    "print(\"形状:\", arr_ndmin.shape)\n",
    "\n",
    "# 从现有 NumPy 数组创建 (默认会复制)\n",
    "original_arr = np.array([1, 2, 3])\n",
    "copied_arr = np.array(original_arr) # 默认 copy=True\n",
    "copied_arr[0] = 99\n",
    "print(\"\\n原始数组:\", original_arr)\n",
    "print(\"复制的数组 (修改后):\", copied_arr)\n",
    "\n",
    "# 不复制数据 (如果可能)\n",
    "original_arr2 = np.array([1, 2, 3])\n",
    "# 如果 original_arr2 已经是 ndarray 并且符合要求，将不会创建新的底层数据缓冲区\n",
    "view_arr = np.array(original_arr2, copy=False)\n",
    "view_arr[0] = 99\n",
    "print(\"\\n原始数组:\", original_arr2) # 注意：原始数组也被修改了，因为它们共享数据\n",
    "print(\"视图数组 (修改后):\", view_arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "38ecdb9a-c92d-4566-b39a-7b792ae6d582",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建在一定区间内均匀分隔的数组\n",
    "import numpy as np\n",
    "\n",
    "# 创建一个从 0 到 10 之间包含 5 个均匀间隔点的数组 (包含 0 和 10)\n",
    "arr1 = np.linspace(0, 10, 5)\n",
    "print(\"np.linspace(0, 10, 5):\\n\", arr1)\n",
    "\n",
    "# 创建一个从 0 到 10 之间包含 5 个点，但不包含 10\n",
    "arr2 = np.linspace(0, 10, 5, endpoint=False)\n",
    "print(\"\\nnp.linspace(0, 10, 5, endpoint=False):\\n\", arr2)\n",
    "\n",
    "# 创建一个从 -5 到 5 之间包含 11 个点的数组，并返回步长\n",
    "arr3, step = np.linspace(-5, 5, 11, retstep=True)\n",
    "print(\"\\nnp.linspace(-5, 5, 11, retstep=True):\\n\", arr3)\n",
    "print(\"步长:\", step)\n",
    "\n",
    "# 创建包含 20 个点的数组，数据类型为 float32\n",
    "arr4 = np.linspace(0, 1, 20, dtype=np.float32)\n",
    "print(\"\\nnp.linspace(0, 1, 20, dtype=np.float32):\\n\", arr4)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b4641e0b-4682-4b6a-83b9-f54a66bf0ba1",
   "metadata": {},
   "source": [
    "#### 数组形状操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f08543b-7190-4a2a-b476-b916982206db",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 改变形状\n",
    "import numpy as np\n",
    "a = np.arange(12) # [ 0  1  2  3  4  5  6  7  8  9 10 11]\n",
    "b = a.reshape(3, 4)\n",
    "print(\"\\nreshape() example:\")\n",
    "print(\"Original (a):\\n\", a)\n",
    "print(\"Reshaped (b):\\n\", b)\n",
    "# [[ 0  1  2  3]\n",
    "#  [ 4  5  6  7]\n",
    "#  [ 8  9 10 11]]\n",
    "\n",
    "# 尝试修改 reshape 后的数组，看是否影响原数组（判断是视图还是副本）\n",
    "b[0, 0] = 99\n",
    "print(\"Modified b:\\n\", b)\n",
    "print(\"Original a after b modification:\\n\", a) # a[0] 也变成了 99，说明 b 是 a 的视图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b471e913-6965-4aac-aca8-e12011e37752",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 改变数组大小\n",
    "import numpy as np\n",
    "\n",
    "c = np.array([[1, 2], [3, 4]])\n",
    "print(\"\\nresize() example:\")\n",
    "print(\"Original c:\\n\", c)\n",
    "c.resize((3, 2)) # 就地修改 c\n",
    "print(\"Resized c (larger, filled with 0):\\n\", c)\n",
    "# [[1 2]\n",
    "#  [3 4]\n",
    "#  [0 0]]\n",
    "\n",
    "d = np.arange(6) # 创建均匀间隔的数组\n",
    "print(\"Original d:\", d)\n",
    "d.resize((2, 2))\n",
    "print(\"Resized d (smaller, truncated):\", d) # [[0 1] [2 3]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54cdecb5-525a-46b8-a052-d6a3d90a213b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 展平数组\n",
    "import numpy as np\n",
    "\n",
    "arr_2d = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "print(\"\\nravel() example:\")\n",
    "print(\"Original 2D array:\\n\", arr_2d)\n",
    "arr_raveled = arr_2d.ravel()\n",
    "print(\"Raveled array:\", arr_raveled) # [1 2 3 4 5 6]\n",
    "\n",
    "# 尝试修改 flattened 数组\n",
    "arr_flattened = arr_2d.flatten()\n",
    "arr_flattened[0] = 200\n",
    "print(\"Modified flattened array:\", arr_flattened)\n",
    "print(\"Original 2D array after modification:\\n\", arr_2d) # arr_2d[0,0] 未变，是副本\n",
    "\n",
    "# 尝试修改 raveled 数组\n",
    "arr_raveled[0] = 100\n",
    "print(\"Modified raveled array:\", arr_raveled)\n",
    "print(\"Original 2D array after modification:\\n\", arr_2d) # arr_2d[0,0] 也变了，是视图"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "454dcb0c-5597-4748-b959-1fd1e9551391",
   "metadata": {},
   "source": [
    "#### 计算与统计示例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31640aca-cf56-449e-8f13-d4a860b10f91",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "arr = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "print(\"\\nsum() example:\")\n",
    "print(\"Original array:\\n\", arr)\n",
    "print(\"Sum of all elements:\", arr.sum())            # 1+2+3+4+5+6 = 21\n",
    "print(\"Sum along axis 0 (columns):\", arr.sum(axis=0)) # [1+4, 2+5, 3+6] = [5 7 9]\n",
    "print(\"Sum along axis 1 (rows):\", arr.sum(axis=1))    # [1+2+3, 4+5+6] = [ 6 15]\n",
    "print(\"Sum with keepdims=True (axis=1):\\n\", arr.sum(axis=1, keepdims=True))\n",
    "# [[ 6]\n",
    "#  [15]] (shape is (2, 1))\n",
    "\n",
    "print(\"\\nmean() example:\")\n",
    "print(\"Mean of all elements:\", arr.mean())       # 21 / 6 = 3.5\n",
    "print(\"Mean along axis 0:\", arr.mean(axis=0)) # [2.5 3.5 4.5]\n",
    "print(\"Mean along axis 1:\", arr.mean(axis=1)) # [2. 5.]\n",
    "\n",
    "# 方差与标准差\n",
    "print(\"\\n整个数组的标准差:\", np.std(arr))          # 输出: 整个数组的标准差: 1.707825127659933\n",
    "print(\"按列标准差:\", np.std(arr, axis=0))      # 输出: 按列标准差: [1.5 1.5 1.5]\n",
    "\n",
    "print(\"\\n整个数组的方差:\", np.var(arr))          # 输出: 整个数组的方差: 2.9166666666666665\n",
    "print(\"按列方差:\", np.var(arr, axis=0))      # 输出: 按列方差: [2.25 2.25 2.25]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "126aa883-02bc-494d-a365-a6dfb431ee94",
   "metadata": {},
   "source": [
    "#### 排序示例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c0ab03f-2c28-46a4-9a9c-2917f0652e78",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "arr = np.array([[3, 1, 4], [1, 5, 9]])\n",
    "print(\"\\nsort() example:\")\n",
    "print(\"Original array:\\n\", arr)\n",
    "arr.sort(axis=1) # 沿行排序 (就地修改)\n",
    "print(\"Sorted along axis 1 (rows):\\n\", arr)\n",
    "# [[1 3 4]\n",
    "#  [1 5 9]]\n",
    "arr.sort(axis=0) # 沿列排序 (就地修改)\n",
    "print(\"Sorted along axis 0 (columns):\\n\", arr)\n",
    "# [[1 3 4]\n",
    "#  [1 5 9]] (在上一步基础上排序)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ef8a0164-f69b-440d-af92-5eaf3bab22a0",
   "metadata": {},
   "source": [
    "#### 随机数生成示例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b678130d-31c3-45a0-8f0d-8e7ea23c5a04",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "print(\"\\n--- Legacy API - rand / randn ---\")\n",
    "print(\"np.random.rand(2, 3):\\n\", np.random.rand(2, 3)) # 2x3 均匀分布\n",
    "\n",
    "print(\"np.random.randn(2, 2):\\n\", np.random.randn(2, 2)) # 2x2 标准正态分布\n",
    "\n",
    "print(\"np.random.normal(10, 2, size=5):\", np.random.normal(10, 2, size=5)) # 均值 10，标准差 2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f51b702-517b-4f30-b43c-6d361b950fd2",
   "metadata": {},
   "source": [
    "### 矩阵运算示例"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b701b799-014f-42d3-8725-566e81f4ade9",
   "metadata": {},
   "source": [
    "#### 初始化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92b8d5a3-6a37-495e-ae64-ae6c36709041",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# 设置随机种子以确保结果可复现（可选）\n",
    "np.random.seed(42)\n",
    "\n",
    "# --- 初始化矩阵 ---\n",
    "# 初始化一个 3x4 的矩阵 A，元素为 [0, 1) 之间的随机浮点数\n",
    "A = np.random.rand(3, 4)\n",
    "# 初始化一个 4x3 的矩阵 B，元素为 [0, 1) 之间的随机浮点数\n",
    "B = np.random.rand(4, 3)\n",
    "# 初始化一个与 A 形状相同的矩阵 C，用于演示元素级运算\n",
    "C = np.random.rand(3, 4)\n",
    "# 初始化一个标量\n",
    "scalar = 0.5\n",
    "# 初始化一个偏置向量（将在广播中使用）\n",
    "# 假设用于 A @ B 的结果（3x3），偏置可以是 (3,) 或 (1, 3)\n",
    "bias = np.random.rand(3) # Shape (3,)\n",
    "\n",
    "print(\"--- 初始化矩阵 ---\")\n",
    "print(f\"矩阵 A (3x4):\\n{A}\\n\")\n",
    "print(f\"矩阵 B (4x3):\\n{B}\\n\")\n",
    "print(f\"矩阵 C (3x4):\\n{C}\\n\")\n",
    "print(f\"标量 scalar: {scalar}\\n\")\n",
    "print(f\"偏置向量 bias (shape {bias.shape}): {bias}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a129127-5408-4f81-b040-cef999eb9b16",
   "metadata": {},
   "source": [
    "#### 矩阵乘法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f74b2291-4b0e-460c-8858-329be30a455c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- 1. 矩阵乘法 (Dot Product) ---\n",
    "# 这是大模型中最核心的运算之一，用于线性变换等\n",
    "# A (3x4) @ B (4x3) 结果是 3x3\n",
    "matmul_AB = A @ B\n",
    "# 或者使用 np.dot()\n",
    "# matmul_AB = np.dot(A, B)\n",
    "print(\"--- 1. 矩阵乘法 (A @ B) ---\")\n",
    "print(f\"形状: {matmul_AB.shape}\")\n",
    "print(f\"结果 (3x3):\\n{matmul_AB}\\n\")\n",
    "\n",
    "# B (4x3) @ A (3x4) 结果是 4x4\n",
    "matmul_BA = B @ A\n",
    "print(\"--- 1. 矩阵乘法 (B @ A) ---\")\n",
    "print(f\"形状: {matmul_BA.shape}\")\n",
    "print(f\"结果 (4x4):\\n{matmul_BA}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b2e249a9-7096-4bcb-9f9d-ecf02b23286f",
   "metadata": {},
   "source": [
    "#### 元素加法 (Element-wise Addition) & 广播"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89d9a153-490c-4361-9518-3b3d725d61a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- 2. 元素加法 (Element-wise Addition) & 广播 ---\n",
    "# 用于添加偏置、残差连接等\n",
    "# 两个形状相同的矩阵相加\n",
    "add_AC = A + C\n",
    "print(\"--- 2. 元素加法 (A + C) ---\")\n",
    "print(f\"形状: {add_AC.shape}\")\n",
    "print(f\"结果 (3x4):\\n{add_AC}\\n\")\n",
    "\n",
    "# 广播示例：将偏置向量 bias 加到 matmul_AB (3x3) 的每一行\n",
    "# bias (3,) 会被广播成 (1, 3)，然后扩展到 (3, 3) 与 matmul_AB 相加\n",
    "add_bias = matmul_AB + bias\n",
    "print(\"--- 2. 元素加法 (广播) (A @ B + bias) ---\")\n",
    "print(f\"形状: {add_bias.shape}\")\n",
    "print(f\"结果 (3x3):\\n{add_bias}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e940b2ab-2019-4336-ae15-b09b875e0e6e",
   "metadata": {},
   "source": [
    "#### 元素乘法 (Element-wise Multiplication / Hadamard Product)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dfe29987-50c7-48d0-938e-c08842bd5239",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- 3. 元素乘法 (Element-wise Multiplication / Hadamard Product) ---\n",
    "# 用于门控机制、缩放等\n",
    "# 两个形状相同的矩阵 A 和 C 进行元素乘法\n",
    "elemwise_mul = A * C\n",
    "print(\"--- 3. 元素乘法 (A * C) ---\")\n",
    "print(f\"形状: {elemwise_mul.shape}\")\n",
    "print(f\"结果 (3x4):\\n{elemwise_mul}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "01312ad7-0976-47e1-a3ac-819c8980d946",
   "metadata": {},
   "source": [
    "#### 转置 (Transpose)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd2cbe95-db7c-4def-a88f-9481ef7570a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 在注意力机制等处常用\n",
    "transpose_A = A.T\n",
    "transpose_B = B.T\n",
    "print(\"--- 4. 转置 ---\")\n",
    "print(f\"A 的转置 (A.T) (形状 {transpose_A.shape}):\\n{transpose_A}\\n\")\n",
    "print(f\"B 的转置 (B.T) (形状 {transpose_B.shape}):\\n{transpose_B}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "badd936e-95a8-42ef-9157-ea5e050d12c6",
   "metadata": {},
   "source": [
    "#### 激活函数 (Element-wise Activation - 以 ReLU 为例)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d1e56b76-d98d-4a85-85a6-26fef1734dc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- 5. 激活函数 (Element-wise Activation - 以 ReLU 为例) ---\n",
    "# 对矩阵 A 的每个元素应用 ReLU: max(0, element)\n",
    "relu_A = np.maximum(0, A) # 这里 A 恰好都是正数，换个包含负数的例子更明显\n",
    "temp_mat = np.random.randn(3, 4) # 标准正态分布，包含负数\n",
    "relu_temp = np.maximum(0, temp_mat)\n",
    "print(\"--- 5. 激活函数 (ReLU 应用于随机矩阵) ---\")\n",
    "print(f\"临时随机矩阵 (3x4):\\n{temp_mat}\\n\")\n",
    "print(f\"应用 ReLU 后的结果 (3x4):\\n{relu_temp}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "074a116d-d873-4cdc-9c19-48f0450bc935",
   "metadata": {},
   "source": [
    "#### 求和/归约 (Summation/Reduction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de35e99f-9f47-44cd-b3fd-8c67d7dcbd5f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- 6. 求和/归约 (Summation/Reduction) ---\n",
    "# 用于归一化、损失计算等\n",
    "sum_A_all = np.sum(A) # 对 A 中所有元素求和\n",
    "sum_A_axis0 = np.sum(A, axis=0) # 沿着轴 0 (行) 求和，得到每列的和 (shape (4,))\n",
    "sum_A_axis1 = np.sum(A, axis=1) # 沿着轴 1 (列) 求和，得到每行的和 (shape (3,))\n",
    "print(\"--- 6. 求和/归约 ---\")\n",
    "print(f\"A 所有元素之和: {sum_A_all}\")\n",
    "print(f\"A 沿轴 0 (列) 求和 (shape {sum_A_axis0.shape}): {sum_A_axis0}\")\n",
    "print(f\"A 沿轴 1 (行) 求和 (shape {sum_A_axis1.shape}): {sum_A_axis1}\\n\")\n",
    "# mean() 的用法类似\n",
    "mean_A_axis1 = np.mean(A, axis=1)\n",
    "print(f\"A 沿轴 1 (行) 求均值 (shape {mean_A_axis1.shape}): {mean_A_axis1}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b116c000-2a0b-45be-8ba2-7ba0d5b711cf",
   "metadata": {},
   "source": [
    "#### 标量乘法 (Scalar Multiplication)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d82e1568-12cb-4d1a-b8d1-9dadc693fc34",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- 7. 标量乘法 (Scalar Multiplication) ---\n",
    "# 用于缩放、学习率调整等\n",
    "scalar_mul_A = scalar * A\n",
    "print(\"--- 7. 标量乘法 (scalar * A) ---\")\n",
    "print(f\"形状: {scalar_mul_A.shape}\")\n",
    "print(f\"结果 (3x4):\\n{scalar_mul_A}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5050b401-2c8c-415b-b4a4-acbdbb625398",
   "metadata": {},
   "source": [
    "#### 其他常用运算（在大模型中不那么核心，但基础）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d905d0cf-a7b5-4bab-bd58-417009a67fae",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 求逆 (Inverse) - 仅适用于方阵，且可逆\n",
    "# matmul_AB 是 3x3 方阵，我们尝试求逆\n",
    "try:\n",
    "    inv_matmul_AB = np.linalg.inv(matmul_AB)\n",
    "    print(\"--- 其他：求逆 (Inverse of A @ B) ---\")\n",
    "    print(f\"形状: {inv_matmul_AB.shape}\")\n",
    "    print(f\"结果 (3x3):\\n{inv_matmul_AB}\\n\")\n",
    "    # 验证: (A @ B) @ inv(A @ B) 应该接近单位矩阵 I\n",
    "    identity_check = matmul_AB @ inv_matmul_AB\n",
    "    print(f\"验证 (A@B @ inv(A@B)) - 接近单位矩阵:\\n{np.round(identity_check, decimals=5)}\\n\") # round 用于显示清晰\n",
    "except np.linalg.LinAlgError:\n",
    "    print(\"矩阵 matmul_AB 是奇异矩阵（不可逆）。\\n\")\n",
    "\n",
    "# 行列式 (Determinant) - 仅适用于方阵\n",
    "det_matmul_AB = np.linalg.det(matmul_AB)\n",
    "print(\"--- 其他：行列式 (Determinant of A @ B) ---\")\n",
    "print(f\"结果 (标量): {det_matmul_AB}\\n\")\n",
    "\n",
    "# 迹 (Trace) - 仅适用于方阵\n",
    "trace_matmul_AB = np.trace(matmul_AB)\n",
    "print(\"--- 其他：迹 (Trace of A @ B) ---\")\n",
    "print(f\"结果 (标量): {trace_matmul_AB}\\n\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7a3b8381-3d5c-497a-93e8-e1db7a1af450",
   "metadata": {},
   "source": [
    "#### 奇异值分解 (Singular Value Decomposition - SVD)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8aae0f3e-2010-4adc-92ff-1c04fa88ebfa",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 适用于任意矩阵 (m x n)\n",
    "# U: m x m 正交矩阵，其列是 AA^T 的特征向量 (左奇异向量)\n",
    "# s: 奇异值 (一维数组，通常是按降序排列)\n",
    "# Vh: n x n 正交矩阵的共轭转置，其行是 A^T A 的特征向量 (右奇异向量)\n",
    "# 对于实数矩阵，Vh 就是 V 的转置\n",
    "U, s, Vh = np.linalg.svd(A)\n",
    "print(\"--- 新增：奇异值分解 (SVD) for A ---\")\n",
    "print(f\"矩阵 A (3x4):\\n{A}\\n\")\n",
    "print(f\"左奇异向量 U (形状 {U.shape}):\\n{U}\\n\")\n",
    "print(f\"奇异值 s (形状 {s.shape}):\\n{s}\\n\")\n",
    "print(f\"右奇异向量的共轭转置 Vh (形状 {Vh.shape}):\\n{Vh}\\n\")\n",
    "\n",
    "# 验证 SVD 分解: U @ Sigma @ Vh 应该还原原始矩阵 A\n",
    "# 注意：奇异值 s 是一个一维数组，需要将其转换为对角矩阵才能进行矩阵乘法\n",
    "Sigma = np.zeros(A.shape) # 创建一个与 A 相同形状的零矩阵\n",
    "Sigma[:A.shape[0], :A.shape[0]] = np.diag(s) # 将奇异值填充到对角线\n",
    "reconstructed_A = U @ Sigma @ Vh\n",
    "print(f\"通过 SVD 重构的矩阵 A (接近原始 A):\\n{np.round(reconstructed_A, decimals=5)}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36c5cdd2-5403-4fc0-a783-329bb02a0000",
   "metadata": {},
   "source": [
    "#### 矩阵/向量范数 (Norm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "120afa12-3a09-422e-b421-3da4aab783d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# np.linalg.norm(x, ord=None, axis=None, keepdims=False)\n",
    "# ord 参数指定范数的类型\n",
    "# ord=1: L1 范数 (向量元素绝对值之和，矩阵是列向量绝对值之和的最大值)\n",
    "# ord=2: L2 范数 (向量元素平方和的平方根，矩阵是最大奇异值)\n",
    "# ord='fro': Frobenius 范数 (矩阵所有元素平方和的平方根)\n",
    "\n",
    "print(\"--- 新增：矩阵/向量范数 ---\")\n",
    "\n",
    "# L1 范数 (曼哈顿范数)\n",
    "norm_A_L1 = np.linalg.norm(A, ord=1)\n",
    "print(f\"矩阵 A 的 L1 范数 (列和范数): {norm_A_L1:.4f}\\n\")\n",
    "\n",
    "# L2 范数 (欧几里得范数 / 谱范数)\n",
    "norm_A_L2 = np.linalg.norm(A, ord=2)\n",
    "print(f\"矩阵 A 的 L2 范数 (最大奇异值): {norm_A_L2:.4f}\\n\")\n",
    "\n",
    "# Frobenius 范数 (F-范数)\n",
    "norm_A_Fro = np.linalg.norm(A, ord='fro')\n",
    "print(f\"矩阵 A 的 Frobenius 范数: {norm_A_Fro:.4f}\\n\")\n",
    "\n",
    "# 向量 bias 的 L1 范数\n",
    "norm_bias_L1 = np.linalg.norm(bias, ord=1)\n",
    "print(f\"向量 bias 的 L1 范数: {norm_bias_L1:.4f}\\n\")\n",
    "\n",
    "# 向量 bias 的 L2 范数\n",
    "norm_bias_L2 = np.linalg.norm(bias, ord=2)\n",
    "print(f\"向量 bias 的 L2 范数: {norm_bias_L2:.4f}\\n\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
